* This do file merges all survey and administrative school data to conduct empirical analysis

* 1. read the 2017 baseline data
use "$cleaned_dir/baseline_cleaned.dta", clear
merge 1:1 id using "$rawdata_dir/Master_rollcall/Master_rollcall_final.dta"
keep if _merge==3
ren _merge rc_merge

* 2. merge weekly attendance data of the Summer Program
merge 1:1 id using "$cleaned_dir/summer_attendance_cleaned.dta"
ren _merge ss_attndnc_merge
replace summer_school = 0 if summer_school == .
label define summer_school 0 "Non-participants" 1 "Participants"
label values summer_school summer_school
replace summer_language = 0 if summer_language == .
label drop summer_language
label define summer_language 0 "No summer school" 1 "Chichewa" 2 "English"
// label values summer_language summer_language_lbl

* 3. merge weekly test data of the Summer Program 
merge 1:1 id using "$cleaned_dir/summer_tests_cleaned.dta"
ren _merge ss_test_merge

* 4. merge the exit survey of the Summer Program (participant survey)
merge 1:1 id using "$cleaned_dir/exitsurvey_cleaned.dta"
* currently matching 750 students out of 854. 
ren _merge es_merge

* 5. merge the summer school participant's parent survey
merge 1:1 id using "$cleaned_dir/parent_cleaned.dta"
ren _merge ps_merge

* 6. merge the 2017 follow up survey data
merge 1:1 id using "$cleaned_dir/followup_cleaned.dta"
* no follow-up data
drop if _merge==2
gen no_followup=(_merge==1)
ren _merge fs_merge

* 7. merge the 2016-2017 Academic Year Term 3 Exam Score data
merge 1:1 id using "$cleaned_dir/2016T3score_cleaned.dta"
drop if _merge==2
ren _merge bs_t3e_merge

* 8. merge the the Summer School Teacher Survey data
merge m:1 summer_teacher_id using "$cleaned_dir/teacher_cleaned.dta"
drop if _merge==2
ren _merge ts_merge

* 9. merge the the 2017 exam score
merge m:1 id using "$cleaned_dir/2017_exam_cleaned.dta"
drop if _merge==2
ren _merge t1e_merge

merge 1:1 id using "$rawdata_dir/2017-2018 2term random visit/2017-2018 2 term random visit1.dta", keepusing(random_1)

gen random_visit=1 if random_1=="A"
replace random_visit=2 if random_1=="D"
replace random_visit=3 if random_1=="P"
replace random_visit=4 if random_1=="T"

drop random_1
drop if _merge==2
ren _merge rv_merge

* 10. merge the the 2018 2nd term quiz & survey
merge 1:1 id using "$cleaned_dir/2018_quiz_cleaned.dta"
drop if _merge==2
ren _merge f2_merge

* 11. merge the the 2018 2nd term exam score
merge 1:1 id using "$cleaned_dir/2018_exam_cleaned.dta"
drop if _merge==2
ren _merge t2e_merge


*********************************************************************************
*********************************************************************************
* derived variable generation from the master data
*********************************************************************************
*********************************************************************************

*********************************************************************************
* 1. from the baseline survey
*********************************************************************************


* List up variables to be used in analysis
local bs_var "bs_*"
// local bs_var "bs_age bs_fatherlive- bs_chewalang bs_schtravel-bs_studywkdays bs_studywkends_time-bs_studywkdays_home bs_studywkdays_companya- bs_studywkends bs_studywkdays_time- bs_studywkends_home bs_studywkends_companya- bs_work_home bs_workdays bs_worktime- bs_studysummer_days bs_studysummer_time- bs_studysummer_home bs_studysummer_companya- bs_worksummer bs_grit bs_grit_impute bs_selfesteem bs_selfesteem_impute bs_conscientious- bs_listen_engz"


*********************************************************************************
* 2. from the summer school attendance data
*********************************************************************************



* List up variables to be used in analysis
local summer_attend_var "num_attend rate_attend summer_school summer_language"

*********************************************************************************
* 3. from the summer school test data
*********************************************************************************



* List up variables to be used in analysis
local summer_test_var "math_* social_* next_grade"



*********************************************************************************
* 4. from the summer school exit survey data (summer school participants)
*********************************************************************************

* List up variables to be used in analysis
local es_var "es_q103_dum es_q105_chi es_q105_eng es_q105_both es_q106 es_q107_dum es_q108_chi es_q108_eng es_q109_chi es_q109_eng es_q110 es_q111_dum es_q112 es_q113_chi es_q113_eng"


*********************************************************************************
* 5. from the 2017 follow-up survey data
*********************************************************************************


* List up variables to be used in analysis
local fs_var "no_followup fs_*"


*********************************************************************************
* 6. from the summer school participants' parent survey data
*********************************************************************************

* List up variables to be used in analysis
local ps_var "ps_q*"


*********************************************************************************
* 7. from the 2016-2017 academic year Term 3 exam score data
*********************************************************************************

* List up variables to be used in analysis
local t3_var "bs_*"


*********************************************************************************
* 8. from the summer school teacher survey data
*********************************************************************************

* List up variables to be used in analysis
local ts_var "ts_age ts_language ts_female ts_totexp tfu_q201 tfu_q202_dum tfu_q204_dum tfu_tch_score_z tfu_q210_dum tfu_q211_chi tfu_q211_both tfu_q211_eng tfu_q212_chi tfu_q212_both tfu_q212_eng tfu_q213_chi tfu_q213_both tfu_q213_eng tfu_q211_cor tfu_q212_cor tfu_q213_cor tfu_q214_dum tfu_q218_dum tfu_q219_dum tfu_q220_dum tfu_q221_dum tfu_q222_dum tfu_q223_dum tfu_q224_dum tfu_q225_dum tfu_q226_dum tfu_q227_dum tfu_q228_dum tfu_q229_dum tfu_q230_dum tfu_fb_score_z"

*********************************************************************************
* 9. 2017 Term Exam scores
*********************************************************************************

* List up variables to be used in analysis
local test17_var "fs_eng fs_math fs_chi fs_ssbk fs_sci fs_art_life fs_total_std4 fs_total_std5 fs_total_std6 fs_pass fs_score_adj *z"

*********************************************************************************
* 10. 2018 2nd Term Quiz scores & Survey
*********************************************************************************

* List up variables to be used in analysis
local quiz18_var "f2_std f2_quiz_* f2_survey_* f2_like_study_total* f2_study_hard_total* f2_study_frnd_total* f2_study_parents_total* f2_*_dum "

*********************************************************************************
* 11. 2018 2nd Term Final Exam
*********************************************************************************

* List up variables to be used in analysis
local exam18_var "t2e_std_progress t2e_std_exam t2e_mark_math* t2e_grd_math t2e_mark_social* t2e_grd_social t2e_mark_bible* t2e_grd_bible t2e_mark_eng* t2e_grd_eng t2e_mark_chi* t2e_grd_chi t2e_mark_sci* t2e_grd_sci t2e_mark_art* t2e_grd_art t2e_mark_total* t2e_grd_total t2e_passorfail t2e_math5_total* t2e_social5_total* t2e_math6_total* t2e_social6_total* t2e_*_totalz t2e_math5_total_calc* t2e_social5_total_calc* t2e_math6_total_calc* t2e_social6_total_calc* t2e_*_total_calcz t2e_math5_total_summer* t2e_social5_total_summer* t2e_math6_total_summer* t2e_social6_total_summer* t2e_*_total_summerz t2e_math5_total_nosummer t2e_social5_total_nosummer t2e_math6_total_nosummer t2e_social6_total_nosummer t2e_*_total_nosummerz"


********************************************************************************************
* Keep variables to be used in analysis
keep id school_code std class gender female old_std lang3 summeroffer3 transfer transferto new_std repeat random_visit *_merge `bs_var' `summer_attend_var' `summer_test_var' `es_var' `fs_var' `ps_var' `t3_var' `test17_var' `ts_var' `quiz18_var' `exam18_var'
********************************************************************************************

* compress the data size 
compress

***********************************************************************************************
* Derived variables
***********************************************************************************************
gen dropout=(repeat=="drop" )
gen repeated=(repeat=="repeat" )
gen no_progress=(dropout==1 | repeated==1)

gen random_drop=(random_visit==2)
gen random_present=(random_visit==3)

* district 
gen zone=1 if school_code !=.
replace zone=2 if school_code >= 22 & school_code < 33
replace zone=3 if school_code >= 33 & school_code !=.


* save the Master data file
save "$cleaned_dir/language_master.dta", replace

